* Begin log file
capture log close sublog
log using "$projdir/log/4_qcew_build.txt", name(sublog) text replace

*-------------------------------------------------------------------------------
* Description: Clean QCEW data
* Author: Carl McPherson
*
* Updated: May 24, 2024
*-------------------------------------------------------------------------------

tempfile core

****************************************
* CLEAN UP LOCAL MINIMUM WAGES 
****************************************
use "$projdir/dta/build/src/mw_panel/hand_coded_geo_xwalk.dta", clear

merge 1:m statefips locality using "$projdir/dta/build/src/mw_panel/mw_substate_quarterly.dta", assert(3) nogen 

collapse (min) loc_min_mw = max_mw (max) loc_max_mw = max_mw, by(statefips countyfips quarterly_date)

save "$projdir/dta/build/cln/loc_mw_quaterly.dta", replace 

****************************************
* LOAD QCEW AND ADD MINIMUM WAGE INFO
****************************************
use "$projdir/dta/build/src/qcew_panel_raw.dta", clear

*Standardize county codes
gen statefips = real(substr(area_fips,1,2))
gen countyfips = real(substr(area_fips,3,4))

order area_fips year quarter statefips countyfips
drop if inlist(countyfips, 999) // Unidentified areas 
drop if inlist(statefips, 72, 78, .) //Puerto Rico, Virgin Islands or MSAs 
drop if inlist(statefips, 2, 15) // AK & HI 

* Add state minimum wage changes 
rename quarterly quarterly_date

merge m:1 statefips quarterly using "$projdir/dta/build/src/mw_panel/mw_state_quarterly" ///
	, keepusing(stateab max_mw) keep(1 3) nogen

merge m:1 statefips countyfips quarterly_date using "$projdir/dta/build/cln/loc_mw_quaterly.dta" ///
	, keep(1 3) nogen 

order stateab year quarter countyfips *_mw

replace loc_min_mw = max_mw if loc_min_mw == .
replace loc_max_mw = max_mw if loc_max_mw == .

order stateab countyfips year quarter loc_max_mw

****************************************
* FORMAT DATA
****************************************
* Balance panel
drop if quarterly_date == .

rename industry_code ind 
rename qtrly_estabs estabs 

gen area = statefips*1000 + countyfips

egen group_id = group(area ind)

tsset group_id quarterly_date 

gen employment = (month1 + month2 + month3)/3

gen earnings = log(avg_wkly_wage)
gen lnemp = log(employment)

egen min_emp = min(employment), by(group_id)

* Defined demean-ed outcomes
gen preperiod = inrange(quarterly_date,200,217)

egen pre_earn = mean(earnings) if preperiod == 1, by(group_id)
egen temp = min(pre_earn), by(group_id)
replace earnings = earnings - temp 
drop temp 

egen pre_emp = mean(lnemp) if preperiod == 1, by(group_id)
egen temp = min(pre_emp), by(group_id)
replace lnemp = lnemp - temp 
drop temp 

tempfile unbalanced  
save `unbalanced'

* Force panel to strictly balance 
summ quarterly
local min = `r(min)'
local max = `r(max)'

levels ind 
global inds  "`r(levels)'" 

distinct ind 
local distinct = `r(ndistinct)' 

keep area 
duplicates drop 

expand `max' - `min' + 1 
bys area: gen quarterly_date = `min' + _n -1   

* Expand by no. of unique industries 
expand `distinct'

gen ind = "" 
local iter = 0
foreach v in $inds {
	local iter = `=`iter'+1'
	bys area quarterly: replace ind =  "`v'" if _n == `iter'
}

egen group_id2 = group(area ind) 

merge 1:1 area ind quarterly using `unbalanced', assert(1 3) nogen 
drop group_id
rename group_id2 group_id 

tsset group_id quarterly_date
assert "`r(balanced)'" == "strongly balanced"

compress
save `core', replace 

****************************************
* ADD COUNTY NAMES AND POPULATION  
****************************************
import delimited "$projdir/dta/build/src/county_pop.csv", clear 

rename state statefips 
rename county countyfips
rename ctyname county 
rename popestimate20?? pop?? 

keep statefips countyfips county pop10 pop13  

merge 1:m statefips countyfips using `core', keep(2 3) nogen 
replace county = subinstr(county, " ", "", .)

tab ind if area == 53033

replace area = - 1 if area ==. & countyfips == -1 

replace employment = (month1 + month2 + month3)/3 if employment == .  

replace ind = "31" if ind == "31-33"
replace ind = "44" if ind == "44-45" 
destring ind, replace 

* Restrict to a county level
drop if countyf == 0

* Save data
save "$projdir/dta/build/cln/analysis_panel_cty", replace

* Close log file
log close sublog
